<!DOCTYPE html>
<html>
<head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=emulateIE7" />
    <title>Coverage for skf/api/chatbot/scripts/intent_classifier.py: 100%</title>
    <link rel="icon" sizes="32x32" href="favicon_32.png">
    <link rel="stylesheet" href="style.css" type="text/css">
    <script type="text/javascript" src="jquery.min.js"></script>
    <script type="text/javascript" src="jquery.hotkeys.js"></script>
    <script type="text/javascript" src="jquery.isonscreen.js"></script>
    <script type="text/javascript" src="coverage_html.js"></script>
    <script type="text/javascript">
        jQuery(document).ready(coverage.pyfile_ready);
    </script>
</head>
<body class="pyfile">
<div id="header">
    <div class="content">
        <h1>Coverage for <b>skf/api/chatbot/scripts/intent_classifier.py</b> :
            <span class="pc_cov">100%</span>
        </h1>
        <img id="keyboard_icon" src="keybd_closed.png" alt="Show keyboard shortcuts" />
        <h2 class="stats">
            42 statements &nbsp;
            <button type="button" class="run shortkey_r button_toggle_run" title="Toggle lines run">42 run</button>
            <button type="button" class="mis show_mis shortkey_m button_toggle_mis" title="Toggle lines missing">0 missing</button>
            <button type="button" class="exc show_exc shortkey_x button_toggle_exc" title="Toggle lines excluded">0 excluded</button>
        </h2>
    </div>
</div>
<div class="help_panel">
    <img id="panel_icon" src="keybd_open.png" alt="Hide keyboard shortcuts" />
    <p class="legend">Hot-keys on this page</p>
    <div>
    <p class="keyhelp">
        <span class="key">r</span>
        <span class="key">m</span>
        <span class="key">x</span>
        <span class="key">p</span> &nbsp; toggle line displays
    </p>
    <p class="keyhelp">
        <span class="key">j</span>
        <span class="key">k</span> &nbsp; next/prev highlighted chunk
    </p>
    <p class="keyhelp">
        <span class="key">0</span> &nbsp; (zero) top of page
    </p>
    <p class="keyhelp">
        <span class="key">1</span> &nbsp; (one) first highlighted chunk
    </p>
    </div>
</div>
<div id="source">
    <p id="t1" class="run"><span class="n"><a href="#t1">1</a></span><span class="t"><span class="key">import</span> <span class="nam">os</span>&nbsp;</span><span class="r"></span></p>
    <p id="t2" class="run"><span class="n"><a href="#t2">2</a></span><span class="t"><span class="key">from</span> <span class="nam">flask</span> <span class="key">import</span> <span class="nam">Flask</span>&nbsp;</span><span class="r"></span></p>
    <p id="t3" class="run"><span class="n"><a href="#t3">3</a></span><span class="t"><span class="key">import</span> <span class="nam">pandas</span> <span class="key">as</span> <span class="nam">pd</span>&nbsp;</span><span class="r"></span></p>
    <p id="t4" class="run"><span class="n"><a href="#t4">4</a></span><span class="t"><span class="key">from</span> <span class="nam">io</span> <span class="key">import</span> <span class="nam">StringIO</span>&nbsp;</span><span class="r"></span></p>
    <p id="t5" class="run"><span class="n"><a href="#t5">5</a></span><span class="t"><span class="key">from</span> <span class="nam">sklearn</span><span class="op">.</span><span class="nam">feature_extraction</span><span class="op">.</span><span class="nam">text</span> <span class="key">import</span> <span class="nam">TfidfVectorizer</span>&nbsp;</span><span class="r"></span></p>
    <p id="t6" class="run"><span class="n"><a href="#t6">6</a></span><span class="t"><span class="key">from</span> <span class="nam">sklearn</span><span class="op">.</span><span class="nam">model_selection</span> <span class="key">import</span> <span class="nam">train_test_split</span>&nbsp;</span><span class="r"></span></p>
    <p id="t7" class="run"><span class="n"><a href="#t7">7</a></span><span class="t"><span class="key">from</span> <span class="nam">sklearn</span><span class="op">.</span><span class="nam">feature_extraction</span><span class="op">.</span><span class="nam">text</span> <span class="key">import</span> <span class="nam">CountVectorizer</span>&nbsp;</span><span class="r"></span></p>
    <p id="t8" class="run"><span class="n"><a href="#t8">8</a></span><span class="t"><span class="key">from</span> <span class="nam">sklearn</span><span class="op">.</span><span class="nam">feature_extraction</span><span class="op">.</span><span class="nam">text</span> <span class="key">import</span> <span class="nam">TfidfTransformer</span>&nbsp;</span><span class="r"></span></p>
    <p id="t9" class="run"><span class="n"><a href="#t9">9</a></span><span class="t"><span class="key">from</span> <span class="nam">sklearn</span><span class="op">.</span><span class="nam">naive_bayes</span> <span class="key">import</span> <span class="nam">MultinomialNB</span>&nbsp;</span><span class="r"></span></p>
    <p id="t10" class="pln"><span class="n"><a href="#t10">10</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t11" class="run"><span class="n"><a href="#t11">11</a></span><span class="t"><span class="nam">app</span> <span class="op">=</span> <span class="nam">Flask</span><span class="op">(</span><span class="nam">__name__</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t12" class="pln"><span class="n"><a href="#t12">12</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t13" class="run"><span class="n"><a href="#t13">13</a></span><span class="t"><span class="key">def</span> <span class="nam">get_data</span><span class="op">(</span><span class="op">)</span><span class="op">:</span>&nbsp;</span><span class="r"></span></p>
    <p id="t14" class="run"><span class="n"><a href="#t14">14</a></span><span class="t">    <span class="nam">df</span> <span class="op">=</span> <span class="nam">pd</span><span class="op">.</span><span class="nam">read_csv</span><span class="op">(</span><span class="nam">os</span><span class="op">.</span><span class="nam">path</span><span class="op">.</span><span class="nam">join</span><span class="op">(</span><span class="nam">app</span><span class="op">.</span><span class="nam">root_path</span><span class="op">,</span> <span class="str">"../datasets/intent_data.csv"</span><span class="op">)</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t15" class="run"><span class="n"><a href="#t15">15</a></span><span class="t">    <span class="key">return</span> <span class="nam">df</span>&nbsp;</span><span class="r"></span></p>
    <p id="t16" class="pln"><span class="n"><a href="#t16">16</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t17" class="pln"><span class="n"><a href="#t17">17</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t18" class="run"><span class="n"><a href="#t18">18</a></span><span class="t"><span class="key">def</span> <span class="nam">data_prepare</span><span class="op">(</span><span class="op">)</span><span class="op">:</span>&nbsp;</span><span class="r"></span></p>
    <p id="t19" class="run"><span class="n"><a href="#t19">19</a></span><span class="t">    <span class="nam">col</span> <span class="op">=</span> <span class="op">[</span><span class="str">'classs'</span><span class="op">,</span><span class="str">'question'</span><span class="op">]</span>&nbsp;</span><span class="r"></span></p>
    <p id="t20" class="run"><span class="n"><a href="#t20">20</a></span><span class="t">    <span class="nam">y</span><span class="op">=</span><span class="nam">get_data</span><span class="op">(</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t21" class="run"><span class="n"><a href="#t21">21</a></span><span class="t">    <span class="nam">y</span> <span class="op">=</span> <span class="nam">y</span><span class="op">[</span><span class="nam">col</span><span class="op">]</span>&nbsp;</span><span class="r"></span></p>
    <p id="t22" class="run"><span class="n"><a href="#t22">22</a></span><span class="t">    <span class="nam">y</span> <span class="op">=</span> <span class="nam">y</span><span class="op">[</span><span class="nam">pd</span><span class="op">.</span><span class="nam">notnull</span><span class="op">(</span><span class="nam">y</span><span class="op">[</span><span class="str">'question'</span><span class="op">]</span><span class="op">)</span><span class="op">]</span>&nbsp;</span><span class="r"></span></p>
    <p id="t23" class="run"><span class="n"><a href="#t23">23</a></span><span class="t">    <span class="nam">y</span><span class="op">.</span><span class="nam">columns</span> <span class="op">=</span> <span class="op">[</span><span class="str">'classs'</span><span class="op">,</span><span class="str">'question'</span><span class="op">]</span>&nbsp;</span><span class="r"></span></p>
    <p id="t24" class="run"><span class="n"><a href="#t24">24</a></span><span class="t">    <span class="nam">y</span><span class="op">[</span><span class="str">'category_id'</span><span class="op">]</span> <span class="op">=</span> <span class="nam">y</span><span class="op">[</span><span class="str">'classs'</span><span class="op">]</span><span class="op">.</span><span class="nam">factorize</span><span class="op">(</span><span class="op">)</span><span class="op">[</span><span class="num">0</span><span class="op">]</span>&nbsp;</span><span class="r"></span></p>
    <p id="t25" class="run"><span class="n"><a href="#t25">25</a></span><span class="t">    <span class="nam">category_id_df</span> <span class="op">=</span> <span class="nam">y</span><span class="op">[</span><span class="op">[</span><span class="str">'classs'</span><span class="op">,</span> <span class="str">'category_id'</span><span class="op">]</span><span class="op">]</span><span class="op">.</span><span class="nam">drop_duplicates</span><span class="op">(</span><span class="op">)</span><span class="op">.</span><span class="nam">sort_values</span><span class="op">(</span><span class="str">'category_id'</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t26" class="run"><span class="n"><a href="#t26">26</a></span><span class="t">    <span class="nam">category_to_id</span> <span class="op">=</span> <span class="nam">dict</span><span class="op">(</span><span class="nam">category_id_df</span><span class="op">.</span><span class="nam">values</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t27" class="run"><span class="n"><a href="#t27">27</a></span><span class="t">    <span class="nam">id_to_category</span> <span class="op">=</span> <span class="nam">dict</span><span class="op">(</span><span class="nam">category_id_df</span><span class="op">[</span><span class="op">[</span><span class="str">'category_id'</span><span class="op">,</span> <span class="str">'classs'</span><span class="op">]</span><span class="op">]</span><span class="op">.</span><span class="nam">values</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t28" class="run"><span class="n"><a href="#t28">28</a></span><span class="t">    <span class="key">return</span> <span class="nam">y</span>&nbsp;</span><span class="r"></span></p>
    <p id="t29" class="pln"><span class="n"><a href="#t29">29</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t30" class="pln"><span class="n"><a href="#t30">30</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t31" class="run"><span class="n"><a href="#t31">31</a></span><span class="t"><span class="key">def</span> <span class="nam">naive_algo</span><span class="op">(</span><span class="op">)</span><span class="op">:</span>&nbsp;</span><span class="r"></span></p>
    <p id="t32" class="run"><span class="n"><a href="#t32">32</a></span><span class="t">    <span class="nam">tfidf</span> <span class="op">=</span> <span class="nam">TfidfVectorizer</span><span class="op">(</span><span class="nam">sublinear_tf</span><span class="op">=</span><span class="key">True</span><span class="op">,</span> <span class="nam">min_df</span><span class="op">=</span><span class="num">5</span><span class="op">,</span> <span class="nam">norm</span><span class="op">=</span><span class="str">'l2'</span><span class="op">,</span> <span class="nam">encoding</span><span class="op">=</span><span class="str">'latin-1'</span><span class="op">,</span> <span class="nam">ngram_range</span><span class="op">=</span><span class="op">(</span><span class="num">1</span><span class="op">,</span> <span class="num">2</span><span class="op">)</span><span class="op">,</span> <span class="nam">stop_words</span><span class="op">=</span><span class="str">'english'</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t33" class="run"><span class="n"><a href="#t33">33</a></span><span class="t">    <span class="nam">df</span><span class="op">=</span><span class="nam">data_prepare</span><span class="op">(</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t34" class="run"><span class="n"><a href="#t34">34</a></span><span class="t">    <span class="nam">features</span> <span class="op">=</span> <span class="nam">tfidf</span><span class="op">.</span><span class="nam">fit_transform</span><span class="op">(</span><span class="nam">df</span><span class="op">.</span><span class="nam">question</span><span class="op">)</span><span class="op">.</span><span class="nam">toarray</span><span class="op">(</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t35" class="run"><span class="n"><a href="#t35">35</a></span><span class="t">    <span class="nam">labels</span> <span class="op">=</span> <span class="nam">df</span><span class="op">.</span><span class="nam">category_id</span>&nbsp;</span><span class="r"></span></p>
    <p id="t36" class="run"><span class="n"><a href="#t36">36</a></span><span class="t">    <span class="nam">features</span><span class="op">.</span><span class="nam">shape</span>&nbsp;</span><span class="r"></span></p>
    <p id="t37" class="run"><span class="n"><a href="#t37">37</a></span><span class="t">    <span class="nam">X_train</span><span class="op">,</span> <span class="nam">X_test</span><span class="op">,</span> <span class="nam">y_train</span><span class="op">,</span> <span class="nam">y_test</span> <span class="op">=</span> <span class="nam">train_test_split</span><span class="op">(</span><span class="nam">df</span><span class="op">[</span><span class="str">'question'</span><span class="op">]</span><span class="op">,</span> <span class="nam">df</span><span class="op">[</span><span class="str">'classs'</span><span class="op">]</span><span class="op">,</span> <span class="nam">random_state</span> <span class="op">=</span> <span class="num">0</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t38" class="run"><span class="n"><a href="#t38">38</a></span><span class="t">    <span class="nam">count_vect</span> <span class="op">=</span> <span class="nam">CountVectorizer</span><span class="op">(</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t39" class="run"><span class="n"><a href="#t39">39</a></span><span class="t">    <span class="nam">X_train_counts</span> <span class="op">=</span> <span class="nam">count_vect</span><span class="op">.</span><span class="nam">fit_transform</span><span class="op">(</span><span class="nam">X_train</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t40" class="run"><span class="n"><a href="#t40">40</a></span><span class="t">    <span class="nam">tfidf_transformer</span> <span class="op">=</span> <span class="nam">TfidfTransformer</span><span class="op">(</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t41" class="run"><span class="n"><a href="#t41">41</a></span><span class="t">    <span class="nam">X_train_tfidf</span> <span class="op">=</span> <span class="nam">tfidf_transformer</span><span class="op">.</span><span class="nam">fit_transform</span><span class="op">(</span><span class="nam">X_train_counts</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t42" class="run"><span class="n"><a href="#t42">42</a></span><span class="t">    <span class="nam">clf</span> <span class="op">=</span> <span class="nam">MultinomialNB</span><span class="op">(</span><span class="op">)</span><span class="op">.</span><span class="nam">fit</span><span class="op">(</span><span class="nam">X_train_tfidf</span><span class="op">,</span> <span class="nam">y_train</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t43" class="run"><span class="n"><a href="#t43">43</a></span><span class="t">    <span class="key">return</span> <span class="nam">clf</span><span class="op">,</span><span class="nam">count_vect</span>&nbsp;</span><span class="r"></span></p>
    <p id="t44" class="pln"><span class="n"><a href="#t44">44</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t45" class="pln"><span class="n"><a href="#t45">45</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t46" class="run"><span class="n"><a href="#t46">46</a></span><span class="t"><span class="key">def</span> <span class="nam">predict</span><span class="op">(</span><span class="nam">question</span><span class="op">)</span><span class="op">:</span>&nbsp;</span><span class="r"></span></p>
    <p id="t47" class="run"><span class="n"><a href="#t47">47</a></span><span class="t">    <span class="nam">clf</span><span class="op">,</span><span class="nam">count_vect</span><span class="op">=</span><span class="nam">naive_algo</span><span class="op">(</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t48" class="run"><span class="n"><a href="#t48">48</a></span><span class="t">    <span class="nam">intent</span><span class="op">=</span><span class="nam">clf</span><span class="op">.</span><span class="nam">predict</span><span class="op">(</span><span class="nam">count_vect</span><span class="op">.</span><span class="nam">transform</span><span class="op">(</span><span class="op">[</span><span class="nam">question</span><span class="op">]</span><span class="op">)</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t49" class="run"><span class="n"><a href="#t49">49</a></span><span class="t">    <span class="nam">intent</span><span class="op">=</span><span class="nam">str</span><span class="op">(</span><span class="nam">intent</span><span class="op">)</span><span class="op">.</span><span class="nam">strip</span><span class="op">(</span><span class="str">"['']"</span><span class="op">)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t50" class="run"><span class="n"><a href="#t50">50</a></span><span class="t">    <span class="key">return</span> <span class="nam">intent</span>&nbsp;</span><span class="r"></span></p>
    <p id="t51" class="pln"><span class="n"><a href="#t51">51</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
    <p id="t52" class="pln"><span class="n"><a href="#t52">52</a></span><span class="t"><span class="com">##ques=input("Enter your question ")</span>&nbsp;</span><span class="r"></span></p>
    <p id="t53" class="pln"><span class="n"><a href="#t53">53</a></span><span class="t"><span class="com">##x=predict(ques)</span>&nbsp;</span><span class="r"></span></p>
    <p id="t54" class="pln"><span class="n"><a href="#t54">54</a></span><span class="t"><span class="com">##intent=str(x).strip("['']")</span>&nbsp;</span><span class="r"></span></p>
    <p id="t55" class="pln"><span class="n"><a href="#t55">55</a></span><span class="t">&nbsp;</span><span class="r"></span></p>
</div>
<div id="footer">
    <div class="content">
        <p>
            <a class="nav" href="index.html">&#xab; index</a> &nbsp; &nbsp; <a class="nav" href="https://coverage.readthedocs.io">coverage.py v5.5</a>,
            created at 2021-03-26 13:45 +0100
        </p>
    </div>
</div>
</body>
</html>
